################################################################################
# Title   : Replication Code — Union Spillovers Paper
# Project : The Impact of Unions on Non-union Wage Setting: Threats and Bargaining
# Authors : David A. Green, Ben M. Sand, Iain G. Snoddy, Jeanne Tschopp
# Output  : Appendix Figure 4
# Date    : August 2025
################################################################################


# Data
dat <- read_dta(file.path(wd, "/reg_dataRR3_app_fig_detind_city_plus2_All_case_0.dta")) %>%
  select(
    n_ic, n_ic1, reswage, reswage1,
    city_plus2, year2, reg_wgt, reg_wgt1,
    win_frac_ic_5, detind, N_to_U
  ) %>%
  mutate(
    win_frac_ic_5 = Winsorize(
      win_frac_ic_5,
      quantile(win_frac_ic_5, probs = c(0.00, 0.99), na.rm = TRUE)
    )
  ) %>%
  group_by(year2, city_plus2) %>%
  mutate(
    non.union.wage = weighted.mean(reswage, n_ic, na.rm = TRUE),
    union.wage     = weighted.mean(reswage1, n_ic1, na.rm = TRUE)
  )

# Aggregate
dat <- dat %>%
  summarize(
    across(c(non.union.wage, union.wage, N_to_U, win_frac_ic_5), mean, na.rm = TRUE),
    across(c(reg_wgt, reg_wgt1), sum, na.rm = TRUE)
  ) %>%
  mutate(
    year = case_when(
      year2 == 1 ~ 1980,
      year2 == 2 ~ 1990,
      year2 == 3 ~ 2000,
      year2 == 4 ~ 2010,
      year2 == 5 ~ 2019
    )
  ) %>%
  select(-year2)



df <- inner_join(city, dat, by = c("city_plus2", "year")) %>% 
  inner_join(city2, by = c("city"))  



# select cities
select.cities <- c(
  "Detroit,MI",
  "Gary-Hammond-East Chicago, IN",
  "Cleveland,OH",
  "Akron,OH",
  "Rochester, NY",
  "Nassau-Suffolk, NY",
  "Washington, DC-MD",
  "Sacramento, CA",
  "Pittsburgh, PA",
  "Chicago,IL",
  "OH",
  "PA",
  "Dallas-, TX",
  "Baltimore, MD",
  "Philadelphia, PA-NJ",
  "New York, NY"
)

union.plot.v2 <- function(.data, y, x, form, endyear = 2019) {
  fig <- .data %>%
    filter(year %in% c(1980, endyear)) %>%
    mutate(
      dT = last(.data[["N_to_U"]]) - first(.data[["N_to_U"]]),
      dE = last(.data[["win_frac_ic_5"]]) - first(.data[["win_frac_ic_5"]]),
      dUW = last(.data[["union.wage"]]) - first(.data[["union.wage"]]),
      dW = last(.data[["non.union.wage"]]) - first(.data[["non.union.wage"]]),
      X = last(.data[[x]]) - first(.data[[x]]),
      size = first(n)
    ) %>%
    filter(!is.na(dE)) %>%
    group_by(city_plus2) %>%
    mutate(n = n()) %>%
    filter(n == 2
           , city != "AK", ) %>%
    ungroup() %>%
    mutate(a = min_rank(desc(size))) %>%
    data.frame()
  
  fm <- paste("dW ~", form) %>% as.formula
  fig$lhs <- lm(fm,
                data = fig %>% filter(year == 1980),
                weights = size)$res
  
  fm <- paste(" X~", form) %>% as.formula
  fig$rhs <- lm(fm,
                data = fig %>% filter(year == 1980),
                weights = size)$res
  
  m <- lm(lhs ~ rhs,
          data = fig %>% filter(year == 1980),
          weights = size)
  
  summary(m) %>% print()
  
  slope <- coef(m)["rhs"]
  std_err <- summary(m)$coefficients["rhs", "Std. Error"]
  std_err <- coeftest(m, vcov. = vcovHC(m, type = "HC2"))[2, 2]
  
  # Formatting annotation text
  annotation_text <- sprintf("Slope: %.2f (%.2f)", slope, std_err)
  x_pos <- min(fig$rhs)
  y_pos <- min(fig$lhs)
  
  ggplot(fig, aes(y = lhs, x = rhs, size = size / 2)) +
    geom_vline(
      xintercept = 0,
      linetype = 2,
      size = .5,
      alpha = 2 / 5
    ) +
    geom_hline(
      yintercept = 0,
      linetype = 2,
      size = .5,
      alpha = 2 / 5
    ) +
    geom_abline(
      intercept = coef(m)[1],
      slope = coef(m)[2],
      linetype = 1 ,
      alpha = 1
    ) +
    geom_point(
      data = fig %>% filter(!city %in% select.cities),
      alpha = 1 / 5,
      shape = 19,
      show.legend = F
    ) +
    
    geom_point(
      data = fig %>%
        filter(city %in% select.cities),
      mapping = aes(
        y = lhs,
        x = rhs,
        color = city,
        fill = city,
        size = size / 1.5,
        alpha = .66
      ),
      show.legend = F,
      shape = 23
    )  +
    geom_text_repel(
      data = fig %>% filter(year == 1980) %>%
        mutate(city = ifelse(city %in% select.cities , city, "")),
      max.time = 60,
      max.iter = 100000,
      max.overlaps = 20,
      force = 3,
      box.padding = .75,
      min.segment.length = 0,
      point.padding = 0,
      size = 2,
      aes(
        y = lhs,
        x = rhs,
        point.size = n,
        label = city
      ),
      size = 3
    )   +
    annotate(
      "text",
      x = Inf,
      y = -Inf,
      label = annotation_text,
      hjust = 1.1,
      vjust = -0.5,
      size = 2
    ) +
    
    scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
    theme(axis.title = element_text(size = 9),
          plot.margin = unit(c(-1, -1.2, -1.2, -1.5), "cm")) +
    guides(color = guide_legend(ncol = 3))
  
}


a <- union.plot.v2(
  df,
  y = "non.union.wage",
  x = "N_to_U",
  form = "dE",
  endyear = 2010
) +
  labs(
    x = "Change in nonunion-union transitions",
    y = "Change in log non-union wages",
    y = "",
    #  title = "B",
    tag = "A)"
  )

b <- union.plot.v2(
  df,
  y = "non.union.wage",
  x = "win_frac_ic_5",
  form = "dT ",
  endyear = 2010
) +
  labs(x = "Change in election probability", y = "Change in log non-union wages", tag = "B)")


c <- union.plot.v2(
  df,
  y = "non.union.wage",
  x = "union.wage",
  form = "dT + dE",
  endyear = 2010
) +
  labs(x = "Change in averge union wages", y = "Change in log non-union wages", tag = "C)")


library(patchwork)
(a /   b  ) &
  ylim(-.2, .2)
ggsave(paste0(figs, "/Appendix_Figure_4.pdf"),
       height = 6,
       width = 8)